{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.utils.data as data\n",
    "import torchvision.transforms as transforms\n",
    "import numpy as np\n",
    "from pytorch_nsynth_lib.nsynth import NSynth\n",
    "from IPython.display import Audio\n",
    "\n",
    "import librosa\n",
    "import librosa.display\n",
    "import phase_operation\n",
    "from tqdm import tqdm\n",
    "import h5py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import spec_ops as spec_ops\n",
    "import phase_operation as phase_op\n",
    "import spectrograms_helper as spec_helper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_data = h5py.File('../data/Nsynth_melspec_IF_pitch.hdf5', 'w')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# audio samples are loaded as an int16 numpy array\n",
    "# rescale intensity range as float [-1, 1]\n",
    "toFloat = transforms.Lambda(lambda x: x / np.iinfo(np.int16).max)\n",
    "# use instrument_family and instrument_source as classification targets\n",
    "dataset = NSynth(\n",
    "        \"../data/nsynth/nsynth-train\",\n",
    "        transform=toFloat,\n",
    "        blacklist_pattern=[ \"string\"],  # blacklist string instrument\n",
    "        categorical_field_list=[\"instrument_family\",\"pitch\"])\n",
    "loader = data.DataLoader(dataset, batch_size=1, shuffle=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def expand(mat):\n",
    "    expand_vec = np.expand_dims(mat[:,125],axis=1)\n",
    "    expanded = np.hstack((mat,expand_vec,expand_vec))\n",
    "    return expanded"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "spec_list=[]\n",
    "pitch_list=[]\n",
    "IF_list =[]\n",
    "mel_spec_list=[]\n",
    "mel_IF_list=[]\n",
    "\n",
    "pitch_set =set()\n",
    "count=0\n",
    "for samples, instrument_family, pitch, targets in loader:\n",
    "    \n",
    "    pitch = targets['pitch'].data.numpy()[0]\n",
    "\n",
    "    if pitch < 24 or pitch > 84:\n",
    "#         print(\"pitch\",pitch)\n",
    "        continue\n",
    "        \n",
    "    sample = samples.data.numpy().squeeze()\n",
    "    spec = librosa.stft(sample, n_fft=2048, hop_length = 512)\n",
    "    \n",
    "    magnitude = np.log(np.abs(spec)+ 1.0e-6)[:1024]\n",
    "#     print(\"magnitude Max\",magnitude.max(),\"magnitude Min\",magnitude.min())\n",
    "    angle =np.angle(spec)\n",
    "#     print(\"angle Max\",angle.max(),\"angle Min\",angle.min())\n",
    "\n",
    "    IF = phase_operation.instantaneous_frequency(angle,time_axis=1)[:1024]\n",
    "    \n",
    "    magnitude = expand(magnitude)\n",
    "    IF = expand(IF)\n",
    "    logmelmag2, mel_p = spec_helper.specgrams_to_melspecgrams(magnitude, IF)\n",
    "\n",
    "#     pitch = targets['pitch'].data.numpy()[0]\n",
    "    \n",
    "    \n",
    "    assert magnitude.shape ==(1024, 128)\n",
    "    assert IF.shape ==(1024, 128)\n",
    "    \n",
    "#     spec_list.append(magnitude)\n",
    "#     IF_list.append(IF)\n",
    "    pitch_list.append(pitch)\n",
    "    mel_spec_list.append(logmelmag2)\n",
    "    mel_IF_list.append(mel_p)\n",
    "    pitch_set.add(pitch)\n",
    "    \n",
    "    count+=1\n",
    "    if count%10000==0:\n",
    "        print(count)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# train_data.create_dataset(\"Spec\", data=spec_list)\n",
    "# train_data.create_dataset(\"IF\", data=IF_list)\n",
    "train_data.create_dataset(\"pitch\", data=pitch_list)\n",
    "train_data.create_dataset(\"mel_Spec\", data=mel_spec_list)\n",
    "train_data.create_dataset(\"mel_IF\", data=mel_IF_list)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
